
** build irs migration files
if 1 {
	
	* Set data year list
	local ls_datayr 0506 0607 0708 0809 0910 1011 1112 1213 1314 1415 1516 1617 1718 1819 
	
	* Loop over datayr 
	foreach datayr in `ls_datayr' {
		
		noisily di "*** Building IRS county-to-county migration flows for: `datayr'..."
		
		** point to folder that contains 7z.exe
		local 7z "C:/Program Files/7-Zip"
		cd "$Rep_smokelabor"
		
		* For early years, append state-segments
		if ("`datayr'"=="0506"|"`datayr'"=="0607"|"`datayr'"=="0708") {
			* Unzip raw files
			shell "`7z'/7z.exe" e ./1_build/irs/raw/county`datayr'.zip -o./1_build/irs/proc
			
			* Append all inflow xls
				
				if ("`datayr'"=="0506"|"`datayr'"=="0607") {
					local ifiles : dir "$Rep_smokelabor/1_build/irs/proc" files "*i.xls"
				}
				else if ("`datayr'"=="0708") {
					local ifiles : dir "$Rep_smokelabor/1_build/irs/proc" files "co`datayr'i*.xls"
				}
				
				* Loop over xls state segment
				foreach file in `ifiles' {
					
					import excel using "$Rep_smokelabor/1_build/irs/proc/`file'", allstring clear cellrange(A8)
					
					* Check desired location 
					assert A=="FIPS Code" if _n==1
					drop if _n==1
					
					* Check desired number of columns
					confirm variable A B C D E F G H I
					cap confirm variable A B C D E F G H I J
					if !_rc {
						* If there are other columns, check they are empty
						cap assert missing(J)
						if _rc {
							assert 1==0
							noisily di " !!! Unexpected columns in excel, file: `file'..."
						}
					}
					
					* Drop empty rows
					drop if missing(A)
					
					* Destring numerical rows
					foreach var in G H I {
						replace `var'="." if `var'=="d"
						destring `var', replace
					}
					
					gen countyfip = A+B
					assert strlen(countyfip)==5
					
					* Check an obs is a county-county pair
					bys countyfip C D: assert _N==1
					
					* Aggregate in-flows to county lvl
					collapse (sum) G H I, by(countyfip)
					
					rename (G H I) (irsmove_INreturn irsmove_INexempt irsmove_INagi)
					
					compress
					saveold "$Rep_smokelabor/1_build/irs/proc/tmpfile_`datayr'_`file'_inflow.dta", replace
					
					erase "$Rep_smokelabor/1_build/irs/proc/`file'"
				}
				*** End of INFLOW state-xls loop
			
			* Append all outflow xls
				
				if ("`datayr'"=="0506"|"`datayr'"=="0607") {
					local ofiles : dir "$Rep_smokelabor/1_build/irs/proc" files "*o.xls"
				}
				else if ("`datayr'"=="0708") {
					local ofiles : dir "$Rep_smokelabor/1_build/irs/proc" files "co`datayr'o*.xls"
				}
				
				* Loop over xls state segment
				foreach file in `ofiles' {
					
					import excel using "$Rep_smokelabor/1_build/irs/proc/`file'", allstring clear cellrange(A8)
					
					* Check desired location 
					assert A=="FIPS Code" if _n==1
					drop if _n==1
					
					* Check desired number of columns
					confirm variable A B C D E F G H I
					cap confirm variable A B C D E F G H I J
					if !_rc {
						* If there are other columns, check they are empty
						cap assert missing(J)
						if _rc {
							assert 1==0
							noisily di " !!! Unexpected columns in excel, file: `file'..."
						}
					}
					
					* Drop empty rows
					drop if missing(A)
					
					* Destring numerical rows
					foreach var in G H I {
						replace `var'="." if `var'=="d"
						destring `var', replace
					}
					
					gen countyfip = A+B
					assert strlen(countyfip)==5
					
					* Check an obs is a county-county pair
					bys countyfip C D: assert _N==1
					
					* Aggregate in-flows to county lvl
					collapse (sum) G H I, by(countyfip)
					
					rename (G H I) (irsmove_OUTreturn irsmove_OUTexempt irsmove_OUTagi)
					
					compress
					saveold "$Rep_smokelabor/1_build/irs/proc/tmpfile_`datayr'_`file'_outflow.dta", replace
					
					erase "$Rep_smokelabor/1_build/irs/proc/`file'"
				}
				*** End of OUTFLOW state-xls loop
				
				* Append files
				foreach param in inflow outflow {
					clear
					local ifiles : dir "$Rep_smokelabor/1_build/irs/proc" files "*`param'.dta"
					foreach file in `ifiles' {
						append using "$Rep_smokelabor/1_build/irs/proc/`file'"
						erase "$Rep_smokelabor/1_build/irs/proc/`file'"
					}
					bys countyfip: assert _N==1
					saveold "$Rep_smokelabor/1_build/irs/proc/tmpfile_`datayr'_`param'.dta", replace
				}
				
				* Merge in- and out-flow files
				use "$Rep_smokelabor/1_build/irs/proc/tmpfile_`datayr'_inflow.dta", clear
				merge 1:1 countyfip using "$Rep_smokelabor/1_build/irs/proc/tmpfile_`datayr'_outflow.dta", nogen
				
				bys countyfip: assert _N==1
				destring countyfip, replace
				
				gen rfrnc_yr = "`datayr'"
				
				lab var countyfip "FIPs county"
				lab var irsmove_INreturn "IRS migration: number of in-flow tax returns (counts)"
				lab var irsmove_INexempt "IRS migration: number of in-flow tax exemptions (persons)"
				lab var irsmove_INagi    "IRS migration: number of in-flow adjusted gross income ($1000)"
				lab var irsmove_OUTreturn "IRS migration: number of out-flow tax returns (counts)"
				lab var irsmove_OUTexempt "IRS migration: number of out-flow tax exemptions (persons)"
				lab var irsmove_OUTagi    "IRS migration: number of out-flow adjusted gross income ($1000)"
				
				compress
				saveold "$Rep_smokelabor/1_build/irs/proc/irs_inflow_outflow_county_annual_`datayr'.dta", replace
				
				* Clean up
				if ("`datayr'"=="0506"|"`datayr'"=="0607") {
					erase "$Rep_smokelabor/1_build/irs/proc/countyin`datayr'.dat"
					erase "$Rep_smokelabor/1_build/irs/proc/countyout`datayr'.dat"
				}
				else if ("`datayr'"=="0708") {
					erase "$Rep_smokelabor/1_build/irs/proc/ci`datayr'us.dat"
					erase "$Rep_smokelabor/1_build/irs/proc/co`datayr'us.dat"
				}
				
				erase "$Rep_smokelabor/1_build/irs/proc/tmpfile_`datayr'_inflow.dta"
				erase "$Rep_smokelabor/1_build/irs/proc/tmpfile_`datayr'_outflow.dta"
		}
		* Starting 0809 file, single csv is available
		else if ("`datayr'"=="0809"|"`datayr'"=="0910"|"`datayr'"=="1011") {
			
			* Unzip raw files
			shell "`7z'/7z.exe" e ./1_build/irs/raw/county`datayr'.zip -o./1_build/irs/proc
			
			foreach param in inflow outflow {
				
				if "`param'"=="inflow" {
					local inout IN
				}
				else if "`param'"=="outflow" {
					local inout OUT
				}
				
				insheet using "$Rep_smokelabor/1_build/irs/proc/county`param'`datayr'.csv", clear
				
				* Replace missing code "-1" with missing
				* 	These corresponds to records with < 10 underlying returns
				foreach var in return_num exmpt_num aggr_agi {
					replace `var'=. if `var'==-1
				}
				
				gen countyfip = state_code_dest*1000 + county_code_dest
				collapse (sum) irsmove_`inout'return=return_num irsmove_`inout'exempt=exmpt_num irsmove_`inout'agi=aggr_agi , by(countyfip)
				
				bys countyfip: assert _N==1
				saveold "$Rep_smokelabor/1_build/irs/proc/tmpfile_`datayr'_`param'.dta", replace
			}
			*** End of in- / out-flow loop
			
			
			* Merge in- and out-flow files
			use "$Rep_smokelabor/1_build/irs/proc/tmpfile_`datayr'_inflow.dta", clear
			merge 1:1 countyfip using "$Rep_smokelabor/1_build/irs/proc/tmpfile_`datayr'_outflow.dta", nogen
			
			bys countyfip: assert _N==1
			destring countyfip, replace
			
			gen rfrnc_yr = "`datayr'"
			
			lab var countyfip "FIPs county"
			lab var irsmove_INreturn "IRS migration: number of in-flow tax returns (counts)"
			lab var irsmove_INexempt "IRS migration: number of in-flow tax exemptions (persons)"
			lab var irsmove_INagi    "IRS migration: number of in-flow adjusted gross income ($1000)"
			lab var irsmove_OUTreturn "IRS migration: number of out-flow tax returns (counts)"
			lab var irsmove_OUTexempt "IRS migration: number of out-flow tax exemptions (persons)"
			lab var irsmove_OUTagi    "IRS migration: number of out-flow adjusted gross income ($1000)"
			
			compress
			saveold "$Rep_smokelabor/1_build/irs/proc/irs_inflow_outflow_county_annual_`datayr'.dta", replace
			
			* Clean up
			local dfiles : dir "$Rep_smokelabor/1_build/irs/proc" files "*.xls"
			foreach file in `dfiles' {
				erase "$Rep_smokelabor/1_build/irs/proc/`file'"
			}
			erase "$Rep_smokelabor/1_build/irs/proc/countyinflow`datayr'.csv"
			erase "$Rep_smokelabor/1_build/irs/proc/countyoutflow`datayr'.csv"
			erase "$Rep_smokelabor/1_build/irs/proc/countyinflow`datayr'.dat"
			erase "$Rep_smokelabor/1_build/irs/proc/countyoutflow`datayr'.dat"
			erase "$Rep_smokelabor/1_build/irs/proc/tmpfile_`datayr'_inflow.dta"
			erase "$Rep_smokelabor/1_build/irs/proc/tmpfile_`datayr'_outflow.dta"
		}
		
		else if ("`datayr'"=="1112"|"`datayr'"=="1213"|"`datayr'"=="1314"|"`datayr'"=="1415"|"`datayr'"=="1516"|"`datayr'"=="1617"|"`datayr'"=="1718"|"`datayr'"=="1819") {
			* Unzip raw files
			shell "`7z'/7z.exe" e ./1_build/irs/raw/`datayr'migrationdata.zip -o./1_build/irs/proc
			
			* Import pooled csv file
			foreach param in inflow outflow {
				
				if "`param'"=="inflow" {
					local inout IN
				}
				else if "`param'"=="outflow" {
					local inout OUT
				}
				
				insheet using "$Rep_smokelabor/1_build/irs/proc/county`param'`datayr'.csv", clear
				
				* Replace missing code "-1" with missing
				* 	These corresponds to records with < 10 underlying returns
				foreach var in n1 n2 agi {
					replace `var'=. if `var'==-1
				}
				
				gen countyfip = y2_statefips*1000 + y2_countyfips
				collapse (sum) irsmove_`inout'return=n1 irsmove_`inout'exempt=n2 irsmove_`inout'agi=agi , by(countyfip)
				
				bys countyfip: assert _N==1
				saveold "$Rep_smokelabor/1_build/irs/proc/tmpfile_`datayr'_`param'.dta", replace
			}
			*** End of in- / out-flow loop
			
			* Merge in- and out-flow files
			use "$Rep_smokelabor/1_build/irs/proc/tmpfile_`datayr'_inflow.dta", clear
			merge 1:1 countyfip using "$Rep_smokelabor/1_build/irs/proc/tmpfile_`datayr'_outflow.dta", nogen
			
			bys countyfip: assert _N==1
			destring countyfip, replace
			
			gen rfrnc_yr = "`datayr'"
			
			lab var countyfip "FIPs county"
			lab var irsmove_INreturn "IRS migration: number of in-flow tax returns (counts)"
			lab var irsmove_INexempt "IRS migration: number of in-flow tax exemptions (persons)"
			lab var irsmove_INagi    "IRS migration: number of in-flow adjusted gross income ($1000)"
			lab var irsmove_OUTreturn "IRS migration: number of out-flow tax returns (counts)"
			lab var irsmove_OUTexempt "IRS migration: number of out-flow tax exemptions (persons)"
			lab var irsmove_OUTagi    "IRS migration: number of out-flow adjusted gross income ($1000)"
			
			compress
			saveold "$Rep_smokelabor/1_build/irs/proc/irs_inflow_outflow_county_annual_`datayr'.dta", replace
			
			* Clean up
			local dfiles : dir "$Rep_smokelabor/1_build/irs/proc" files "*.xls"
			foreach file in `dfiles' {
				erase "$Rep_smokelabor/1_build/irs/proc/`file'"
			}
			erase "$Rep_smokelabor/1_build/irs/proc/countyinflow`datayr'.csv"
			erase "$Rep_smokelabor/1_build/irs/proc/countyoutflow`datayr'.csv"
			erase "$Rep_smokelabor/1_build/irs/proc/stateinflow`datayr'.csv"
			erase "$Rep_smokelabor/1_build/irs/proc/stateoutflow`datayr'.csv"
			erase "$Rep_smokelabor/1_build/irs/proc/`datayr'inmigall.csv"
			erase "$Rep_smokelabor/1_build/irs/proc/tmpfile_`datayr'_inflow.dta"
			erase "$Rep_smokelabor/1_build/irs/proc/tmpfile_`datayr'_outflow.dta"
		}
		
	}
	*** End of data year loop
	
	* Append all data
	use "$Rep_smokelabor/1_build/irs/proc/irs_inflow_outflow_county_annual_0506.dta", clear
	local ls_datayr 0607 0708 0809 0910 1011 1112 1213 1314 1415 1516 1617 1718 1819
	foreach datayr in `ls_datayr' {
		append using "$Rep_smokelabor/1_build/irs/proc/irs_inflow_outflow_county_annual_`datayr'.dta"
	}
	*** End of append data year loop
	
	rename rfrnc_yr _rfrnc_yr
	gen rfrnc_yr = .
	replace rfrnc_yr = 2005 if _rfrnc_yr=="0506"
	replace rfrnc_yr = 2006 if _rfrnc_yr=="0607" 
	replace rfrnc_yr = 2007 if _rfrnc_yr=="0708" 
	replace rfrnc_yr = 2008 if _rfrnc_yr=="0809" 
	replace rfrnc_yr = 2009 if _rfrnc_yr=="0910" 
	replace rfrnc_yr = 2010 if _rfrnc_yr=="1011" 
	replace rfrnc_yr = 2011 if _rfrnc_yr=="1112" 
	replace rfrnc_yr = 2012 if _rfrnc_yr=="1213" 
	replace rfrnc_yr = 2013 if _rfrnc_yr=="1314" 
	replace rfrnc_yr = 2014 if _rfrnc_yr=="1415" 
	replace rfrnc_yr = 2015 if _rfrnc_yr=="1516" 
	replace rfrnc_yr = 2016 if _rfrnc_yr=="1617"
	replace rfrnc_yr = 2017 if _rfrnc_yr=="1718"
	replace rfrnc_yr = 2018 if _rfrnc_yr=="1819"
	
	drop _rfrnc_yr
	lab var rfrnc_yr "Reference year"
	
	compress
	saveold "$Rep_smokelabor/1_build/irs/proc/irs_inflow_outflow_county_year.dta", replace
	
	
}